###################################################################
##                            GOV-2001                           ##
##                          Project Code                         ##
##                          Respiratory                          ##
#----------------------------------------------------------------##
# created April 30, 2019                                         ##
# by Dan Engelberg and Charlotte C. Wagner                       ##
###################################################################
# NOTES: Running this code requires installation of the package AER
# load modules
# load modules
library(haven)                                           
library(AER)
library(stargazer)
library(ggplot2)
library(VARtests)
library(orcutt)
library(mvtnorm)
library(gridExtra)
# set working directory (modify)
setwd('C:/Users/denge/OneDrive/Documents/DUSP/GOV2001/replicationPaper/Submission_replication/Submission_replication')

# read data
data <- read_dta("data_weekly_all.dta")

#normalise to std
data$frp_norm = data$fire_frp_sum/sd(data$fire_frp_sum, na.rm=T)
# std normalised pollution standards index (psi)
data$psi_norm = data$psi_avg/sd(data$psi_avg, na.rm=T)
# std normalised respiraroty tract infections
data$resp_norm = data$acuteupperrespiratorytractinfect/sd(data$acuteupperrespiratorytractinfect, na.rm=T)
# std normalisedconjunctivitis
data$conj_norm = data$acuteconjunctivitis/sd(data$acuteconjunctivitis, na.rm=T)
# std normalised chickenpox incidence
data$cknpox_norm = data$chickenpox/sd(data$chickenpox, na.rm=T)
# interaction term for fire radiative power and wind
data$frp_windspeed = data$frp_norm*data$meanwindspeedkmh
data$fire_windspeed = data$fire_frp_sum*data$meanwindspeedkmh
# add column with lag
data$frp_norm_l1 = c(NA, data$frp_norm[1:(nrow(data) - 1)])
data$frp_windspeed_l1 = c(NA, data$frp_windspeed[1:(nrow(data) - 1)])

data$fire_frp_sum_l1=c(NA, data$fire_frp_sum[1:(nrow(data) - 1)])
data$fire_frp_sum_windspeed_l1=c(NA, data$fire_windspeed[1:(nrow(data) - 1)])

data$log_frp_sum=log(data$fire_frp_sum)

# trend/seasonal/random temp column
trend.dec=ts(data$maximumtemperaturec, frequency=52)
decomp=decompose(trend.dec, "additive")
data$maximumtemperaturec.trend=decomp$trend
data$maximumtemperaturec.seas=decomp$seasonal
data$maximumtemperaturec.rand=decomp$random

# add seasons variable
data$seasons=data$month
data$seasons[data$month==12]='wet'
data$seasons[data$month==1]='wet'
data$seasons[data$month==2]='wet'
data$seasons[data$month==3]='wet'
data$seasons[data$month==4]='wet'
data$seasons[data$month==5]='wet'
data$seasons[data$month==6]='wet'

data$seasons[data$month==11]='dry'
data$seasons[data$month==10]='dry'
data$seasons[data$month==9]='dry'
data$seasons[data$month==8]='dry'
data$seasons[data$month==7]='dry'

# change into 0 and 1s
data$seasons[data$seasons=="wet"]=0
data$seasons[data$seasons=="dry"]=1


# log-lag
data$acuteupperrespiratorytractinfect_ll1=log(c(NA, data$acuteupperrespiratorytractinfect[1:(nrow(data) - 1)]))

# training data
training.data=(data[1:(343-78),])

# test data
test.data=(data[(343-78):343,])

# plot frp against month to derive seasons
postscript("plots/Frp_month.eps", horizontal=FALSE, onefile=FALSE, paper="special",
           point=9, height=2, width=3.5)
par(mfrow=c(1,1))
plot(data$month, data$fire_frp_sum, ylab='Fire radiative power', xlab='Month')
dev.off()

# plot histogram of outcome
postscript("plots/Hist_data.eps", horizontal=FALSE, onefile=FALSE, paper="special",
           point=9, height=5, width=3.5)
par(mfrow=c(2,1)) 
p1=hist(data$acuteconjunctivitis, main='Acute Conjunctivitis', xlab='N per week')
p2=hist(data$acuteupperrespiratorytractinfect, main='Acute Respiratory Tract Infections', xlab='N per week')
dev.off()

# Run original simulation
full.glm.model.ARTI = glm(resp_norm ~ 0* frp_norm + frp_windspeed + 
                            dailyrainfalltotalmm + meantemperaturec + 
                            maximumtemperaturec + minimumtemperaturec + 
                            meanwindspeedkmh + maxwindspeedkmh + psi_change +
                            acutediarrhoea+ as.factor(month)+ as.factor(year), 
                          data=training.data,
                          family=gaussian)

test1.data=test.data
test1.data$year=2015
predict.test=predict(full.glm.model.ARTI, newdata = test1.data,
                     type="response",se.fit=TRUE)

# change to poisson model
noff.glm.model.ARTI.pois = glm(round(acuteupperrespiratorytractinfect) ~  fire_frp_sum+ 
                                 fire_windspeed+ 
                                    dailyrainfalltotalmm +
                                    meantemperaturec +
                                    maximumtemperaturec +
                                    minimumtemperaturec +
                                    meanwindspeedkmh + maxwindspeedkmh + 
                                    psi_change + acutediarrhoea, training.data,
                                  family=poisson)
predict.noff.pois.test=predict(noff.glm.model.ARTI.pois, newdata = test.data,
                                  type="response",se.fit=TRUE)

# add lags
noff.glm.model.ARTI.l1.pois = glm(round(acuteupperrespiratorytractinfect) ~   fire_frp_sum + fire_frp_sum_windspeed_l1 + 
                                    fire_windspeed+ fire_frp_sum_windspeed_l1+ 
                                    dailyrainfalltotalmm +
                                    meantemperaturec +
                                    maximumtemperaturec +
                                    minimumtemperaturec +
                                    meanwindspeedkmh + maxwindspeedkmh + 
                                    psi_change + acutediarrhoea, training.data,
                                  family=poisson)
predict.noff.l1.pois.test=predict(noff.glm.model.ARTI.l1.pois, newdata = test.data,
                                  type="response",se.fit=TRUE)


# add trend/seasonal/rand temp variable
noff.glm.model.ARTI.l1.pois.sep = glm(round(acuteupperrespiratorytractinfect) ~  fire_frp_sum + fire_windspeed +
                                        dailyrainfalltotalmm +
                                        meantemperaturec +
                                        maximumtemperaturec.trend  + maximumtemperaturec.seas + maximumtemperaturec.rand+
                                        minimumtemperaturec +
                                        meanwindspeedkmh + maxwindspeedkmh + 
                                        psi_change + acutediarrhoea, training.data,
                                      family=poisson)
predict.noff.l1.pois.sep.test=predict(noff.glm.model.ARTI.l1.pois.sep, newdata = test.data,
                                      type="response",se.fit=TRUE)


# add seasonal fixed effect
glm.model.ARTI.pois.sep.seas = glm(round(acuteupperrespiratorytractinfect) ~ fire_frp_sum + fire_windspeed +
                                     dailyrainfalltotalmm + meantemperaturec +
                                     maximumtemperaturec.trend + maximumtemperaturec.seas + maximumtemperaturec.rand+
                                     minimumtemperaturec + meanwindspeedkmh + maxwindspeedkmh + 
                                     psi_change + acutediarrhoea +factor(seasons), training.data,
                                   family=poisson)

predict.pois.sep.seas.test=predict(glm.model.ARTI.pois.sep.seas, newdata = test.data,
                                   type="response", se.fit=TRUE)

# plot residuals
postscript("plots/Residuals_ARTI.eps", horizontal=FALSE, onefile=FALSE, paper="special",
           point=9, height=5, width=3.5)
par(mfrow=c(2,1)) 
# plot residuals
plot(y=glm.model.ARTI.pois.sep.seas$residuals, x=glm.model.ARTI.pois.sep.seas$fitted.values, 
     main='Acute Conjunctivitis', xlab='Fitted values', ylab='Residuals')
# plot autocorrelation
plot(y=glm.model.ARTI.pois.sep.seas$residuals, x=seq(1, length(glm.model.ARTI.pois.sep.seas$residuals)), 
     xlab='Week', ylab='Residuals')
dev.off()



postscript("plots/Forecast_ARTI.eps", horizontal=FALSE, onefile=FALSE, paper="special",
           point=9, height=2, width=3.25)
plot1=ggplot()
plot1+geom_line(aes(x=1:343, y=data$resp_norm*sd(data$acuteupperrespiratorytractinfect, na.rm=T)))+
  geom_line(aes(x=(343-78):343, y=predict.test$fit*sd(data$acuteupperrespiratorytractinfect, na.rm=T)), col='red')+
  geom_line(aes(x=(343-78):343, 
                y=predict.noff.pois.test$fit), linetype = "dashed", col='green')+
  geom_line(aes(x=(343-78):343, 
                y=predict.noff.l1.pois.test$fit), linetype = "dashed", col='blue')+
  geom_line(aes(x=(343-78):343, 
                y=predict.noff.l1.pois.sep.test$fit), linetype = "dashed", col='purple')+
  geom_line(aes(x=(343-78):343, 
                y=predict.pois.sep.seas.test$fit), linetype = "dashed", col='orange')+
  theme_bw()+ylab('N of clinic visits')+xlab('Week')+xlim(c(1,325))
dev.off()

postscript("plots/Forecast_zoom_ARTI2.eps", horizontal=FALSE, onefile=FALSE, paper="special",
           point=9, height=2, width=7)
plot2=ggplot()
plot2+geom_line(aes(x=(343-78):343, y=(data$resp_norm*sd(data$acuteupperrespiratorytractinfect, na.rm=T))[265:343], color='Data'))+
  geom_line(aes(x=(343-78):343, y=predict.test$fit*sd(data$acuteupperrespiratorytractinfect, na.rm=T), col='Sheldon et al.'))+
  geom_line(aes(x=(343-78):343, 
                y=predict.noff.pois.test$fit, linetype = "dashed", col='Model 1'), linetype = "dashed")+
  geom_line(aes(x=(343-78):343, 
                y=predict.noff.l1.pois.test$fit, linetype = "dashed", col='Model 2'), linetype = "dashed")+
  geom_line(aes(x=(343-78):343, 
                y=predict.noff.l1.pois.sep.test$fit, col='Model 3'), linetype = "dashed")+
  geom_line(aes(x=(343-78):343, 
                y=predict.pois.sep.seas.test$fit, linetype = "dashed", col='Model 4'), linetype = "dashed")+
  theme_bw()+ylab('N of clinic visits')+xlab('Week')+xlim(c(265,(343-27)))+
  scale_color_manual(values = c(
  'Data' = 'black',
  'Sheldon et al.' = 'red',
  'Model 1' = '#CC3333',
  'Model 2' = "#E69F00",
  'Model 3' = "purple",
  'Model 4' = "darkgreen"))+   
  theme(panel.background = element_rect(fill = "white", colour = "grey50"),
                                     strip.background =element_blank(),
                                    legend.title = element_blank(),
                                     strip.text = element_text( size=11),
                                     legend.position='',
        plot.title = element_text(hjust = 0.5))+
  ggtitle('b) Acute Respiratory Tract Infections')
dev.off()


####################################################################################
# make Chart to compare mod. performance
#####################################################################################

##Charting fit:
#observed values
obs.resp <- tail(data$acuteupperrespiratorytractinfect, 79)

#Culmulative clinical visits v. predicted clinical visits
cumulative.predict <- c(sum((predict.test$fit*sd(data$acuteupperrespiratorytractinfect, na.rm=T))[1:53], na.rm=T) ,
                        sum(predict.noff.pois.test$fit[1:53], na.rm = T),
                        sum(predict.noff.l1.pois.test$fit[1:53], na.rm = T),
                        sum(predict.noff.l1.pois.sep.test$fit, na.rm = T),
                        sum(predict.pois.sep.seas.test$fit, na.rm = T))
cumulative.error <- cumulative.predict - sum(obs.resp[1:53], na.rm = T)        
cumulative.miss.rate <- cumulative.error/sum(obs.resp[1:53], na.rm = T)

#mean absolute error
abs.error <- c(sum((abs(predict.test$fit*sd(data$acuteupperrespiratorytractinfect, na.rm=T) - obs.resp))[1:53], na.rm = T),
               sum(abs(predict.noff.pois.test$fit[1:53] - obs.resp[1:53]), na.rm = T),
               sum(abs(predict.noff.l1.pois.test$fit[1:53] - obs.resp[1:53]), na.rm = T),
               sum(abs(predict.noff.l1.pois.sep.test$fit[1:53] - obs.resp[1:53]), na.rm = T),
               sum(abs(predict.pois.sep.seas.test$fit[1:53] - obs.resp[1:53]), na.rm = T))
abs.error.mean <- abs.error/53

#mean squared error
sq.error <- c(sum((((predict.test$fit*sd(data$acuteupperrespiratorytractinfect, na.rm=T))[1:53] - obs.resp[1:53])^2), na.rm = T),
              sum((predict.noff.pois.test$fit[1:53] - obs.resp[1:53])^2, na.rm = T),
              sum((predict.noff.l1.pois.test$fit[1:53] - obs.resp[1:53])^2, na.rm = T),
              sum((predict.noff.l1.pois.sep.test$fit[1:53] - obs.resp[1:53])^2, na.rm = T),
              sum((predict.pois.sep.seas.test$fit[1:53] - obs.resp[1:53])^2, na.rm = T))
sq.error.mean <- sq.error/53

#creating fit table for publication
fit.table <- data.frame(round(cbind(rep(sum(obs.resp, na.rm = T), 5), cumulative.predict, cumulative.miss.rate,
                                    abs.error.mean, sq.error.mean),2))
family <- c("Gauss.", "Poisson", "Poisson", "Poisson", "Poisson")
fix.effects <- c("month, year", "no", "no", "no", "season")
lagged.ind <- c("no", "no", "yes", "no", "no")
trend.sep <- c("no", "no", "no", "yes", "yes")
fit.table <- cbind(family, fix.effects, lagged.ind, trend.sep, fit.table)
names(fit.table) <- c("model family", "fixed effects", "lagged ind.",
                      "trend seperate", "cum. obs. visits", "cum. predicted",
                      "difference rate", "mean abs. error", "mean sq. error")
fit.table <- t(fit.table)
stargazer(fit.table, summary = FALSE, title = "Specifications Tested")


####################################################################################
# Wind vs. Forest fire
#####################################################################################
glm.model.ARTI.pois.sep.seas = glm(round(acuteupperrespiratorytractinfect) ~ fire_frp_sum + fire_windspeed +
                                                                 dailyrainfalltotalmm + meantemperaturec +
                                                                 maximumtemperaturec.trend + maximumtemperaturec.seas + maximumtemperaturec.rand+
                                                                 minimumtemperaturec + meanwindspeedkmh + maxwindspeedkmh + 
                                                                 psi_change + acutediarrhoea +factor(seasons), data,
                                                               family=poisson)
### plotting respiratory effects
temp.data <- data
temp.data$seasons <- 0 #dry season has the higher level of fire
mean.data <- data.frame(t(colMeans(temp.data, na.rm = T)))
fire_sequence <- seq(min(data$fire_frp_sum),max(data$fire_frp_sum), 1000)
data.length <- length(fire_sequence)
mean.data.lowW <- mean.data
mean.data.lowW$seasons <- 0

#Effect of fire - 2 sd wind

mean.data.lowW$meanwindspeedkmh <- mean(data$meanwindspeedkmh) - 2 * sd(data$meanwindspeedkmh)
resp.lowW <-  data.frame(t(sapply(seq(min(data$fire_frp_sum),max(data$fire_frp_sum), 1000), FUN = function(i){
  mean.data.lowW$fire_frp_sum <- i
  mean.data.lowW$fire_windspeed <- mean.data.lowW$fire_frp_sum * mean.data.lowW$meanwindspeedkmh
  predict.lowW <- predict(glm.model.ARTI.pois.sep.seas, newdata = mean.data.lowW,
                          type="response",se.fit=TRUE)
  c(predict.lowW$fit, predict.lowW$se.fit)
})))
names(resp.lowW) <- c("fit", "se")

#Effect of fire at mean wind

mean.data.meanW <- data.frame(t(colMeans(temp.data, na.rm = T)))
resp.meanW <- data.frame(t(sapply(seq(min(data$fire_frp_sum),max(data$fire_frp_sum), 1000), FUN = function(i){
  mean.data.meanW$fire_frp_sum <- i
  mean.data.meanW$fire_windspeed <- mean.data.meanW$fire_frp_sum * mean.data.meanW$meanwindspeedkmh
  predict.meanW <- predict(glm.model.ARTI.pois.sep.seas, newdata = mean.data.meanW,
                           type="response",se.fit=TRUE)
  c(predict.meanW$fit, predict.meanW$se.fit)
})))
names(resp.meanW) <- c("fit", "se")

#effect of fire + 2 sd wind

mean.data.highW <- data.frame(t(colMeans(temp.data, na.rm = T)))
mean.data.highW$meanwindspeedkmh <- mean(data$meanwindspeedkmh) + 2 * sd(data$meanwindspeedkmh)
resp.highW <- data.frame(t(sapply(seq(min(data$fire_frp_sum),max(data$fire_frp_sum), 1000), FUN = function(i){
  mean.data.highW$fire_frp_sum <- i
  mean.data.highW$fire_windspeed <- mean.data.highW$fire_frp_sum * mean.data.highW$meanwindspeedkmh
  predict.highW <- predict(glm.model.ARTI.pois.sep.seas, newdata = mean.data.highW,
                           type="response",se.fit=TRUE)
  c(predict.highW$fit, predict.highW$se.fit)
})))
names(resp.highW) <- c("fit", "se")

#data frames for plotting

df.windvsfire_high=data.frame(
  fire=seq(min(data$fire_frp_sum),max(data$fire_frp_sum), 1000),
  mean=resp.highW$fit,
  minwind=resp.highW$fit-1.96*resp.highW$se,
  maxwind=resp.highW$fit+1.96*resp.highW$se
)

df.windvsfire_low=data.frame(
  fire=seq(min(data$fire_frp_sum),max(data$fire_frp_sum), 1000),
  mean=resp.lowW$fit,
  minwind=resp.lowW$fit-1.96*resp.lowW$se,
  maxwind=resp.lowW$fit+1.96*resp.lowW$se
)

df.windvsfire_mean=data.frame(
  fire=seq(min(data$fire_frp_sum),max(data$fire_frp_sum), 1000),
  mean=resp.meanW$fit,
  minwind=resp.meanW$fit-1.96*resp.meanW$se,
  maxwind=resp.meanW$fit+1.96*resp.meanW$se
)

df.windvsfire=data.frame(
  rbind(df.windvsfire_low, df.windvsfire_high,
        df.windvsfire_mean)
)

df.windvsfire=cbind(rbind(df.windvsfire_low, df.windvsfire_high,
      df.windvsfire_mean), Type=c(rep('low', 885), rep('high', 885), rep('mean', 885)))

#plotting effects of fire at different wind speeds

postscript("plots/Wind_vs_Fire_ARTI2.pdf", horizontal=FALSE, onefile=FALSE, paper="special",
           point=9, height=2, width=3.5)
plot3=ggplot(df.windvsfire)
plot3+geom_line(aes(x=fire, y=mean, color=Type))+
  geom_ribbon(aes(x=fire, ymin=minwind, ymax=maxwind, fill=Type), alpha=0.3)+
  theme_bw()+ geom_rug(data=data, mapping=aes(x=data$fire_frp_sum))+
  theme(panel.background = element_rect(fill = "white", colour = "grey50"),
        strip.background =element_blank(),
        legend.title = element_blank(),
        strip.text = element_text( size=11),
        legend.position=NULL,
        plot.title = element_text(hjust = 0.5))+
  ylab('Clinic visits')+xlab('Fire radiative power')+
  ggtitle('ARTI')+
  scale_color_manual(values = c('#CC0000', '#0066CC', '#999999'))+
  scale_fill_manual(values = c('#CC0000', '#0066CC', '#999999'))
dev.off()  
####################################################################################
# First differences
#####################################################################################

#mean meanwindspeed

mean.data.meanW$fire_frp_sum <- mean(data$fire_frp_sum, na.rm = T)
fire_predict <- predict(glm.model.ARTI.pois.sep.seas, newdata = mean.data.meanW,
                        type="response",se.fit=TRUE)
fire_predict_low <- resp.meanW$fit[1]
fire_predict_high <- resp.meanW$fit[length(resp.meanW$fit)]


#low meanwindspeed

mean.data.lowW$fire_frp_sum <- mean(data$fire_frp_sum, na.rm = T)
mean.data.lowW$fire_windspeed <- mean.data.lowW$fire_frp_sum * mean.data.lowW$meanwindspeedkmh
fire_predict.lw <- predict(glm.model.ARTI.pois.sep.seas, newdata = mean.data.lowW,
                           type="response",se.fit=TRUE)
fire_predict_low.lw <- resp.lowW$fit[1]
fire_predict_high.lw <- resp.lowW$fit[length(resp.lowW$fit)] 

#high meanwindspeed

mean.data.highW$fire_frp_sum <- mean(data$fire_frp_sum, na.rm = T)
mean.data.highW$fire_windspeed <- mean.data.highW$fire_frp_sum * mean.data.highW$meanwindspeedkmh
fire_predict.hw <- predict(glm.model.ARTI.pois.sep.seas, newdata = mean.data.highW,
                           type="response",se.fit=TRUE)
fire_predict_low.hw <- resp.highW$fit[1]
fire_predict_high.hw <- resp.highW$fit[length(resp.highW$fit)] 


####################################################################################
# Simulating impact
#####################################################################################

set.seed(02138)
# simulate beta
sim.beta<-rmvnorm(5000,
                  mean=coef(glm.model.ARTI.pois.sep.seas),
                  sigma=vcov(glm.model.ARTI.pois.sep.seas))

# set covariates to observed value
cov.obs=model.matrix(glm.model.ARTI.pois.sep.seas)

cov.obs.null=cov.obs
v <- min(model.matrix(glm.model.ARTI.pois.sep.seas)[,2])
cov.obs.null[,2] = v
cov.obs.null[,3] = v * (model.matrix(glm.model.ARTI.pois.sep.seas)[,9])

# compute lambda across time
difference.over.time <- t(sapply(1:5000, FUN = function(i){
  obs.predict <- exp(sim.beta[i,]%*%t(cov.obs))
  null.predict <- exp(sim.beta[i,]%*%t(cov.obs.null))
  obs.predict - null.predict
}))
diff.quant <- t(sapply(1:291, FUN = function(i){
  quantile(difference.over.time[,i], c(.025,.5,.975))
}))


# plot
pdf("plots/fire_visits_resp.pdf",
    point=9, height=2, width=3.25)
plot1=ggplot()
plot1+geom_hline(yintercept=quantile(diff.quant[,2], .95), linetype="dashed")+
      geom_ribbon(aes(x=26:316,
                      ymin = diff.quant[,1], ymax = diff.quant[,3]), fill = "black", alpha=0.5)+
      geom_line(aes(x=26:316,
                    y=diff.quant[,2]),  col='red')+theme_bw()+
  ylab('N of clinic visits')+xlab('Week')+scale_y_continuous()
dev.off()

### Total visits throughout study
colSums(diff.quant)
colSums(diff.quant)[2]/sum(data$acuteupperrespiratorytractinfect, na.rm = T)
clinical.account <- diff.quant[,2]/data$acuteupperrespiratorytractinfect[27:317]

#Once per year events
quantile(clinical.account, .98)
#Max events
max(clinical.account)


# plot frp against month to derive seasons
layer_rect1 <- geom_rect(
      data = data, 
      aes(xmin = 6.5, xmax = 11.5, ymin = -Inf, ymax = Inf), 
      fill='lightpink'
)

postscript("plots/Frp_month_resp.pdf", horizontal=FALSE, onefile=FALSE, paper="special",
           point=9, height=2, width=3.5)
p1=ggplot()
p1+layer_rect1+
      geom_point(aes(x=as.integer(data$month), y=data$fire_frp_sum))+
      ylab('Fire radiative power')+
      xlab('Month')+theme_bw()+
      scale_x_discrete(limits=1:12, labels=1:12 )
dev.off()






